User:PMBot/Code

Following is the preliminary source code. It reads topic page nameas from Proteopedia:Topic Pages and outputs a list of what would be written when it would have been the first pass. Only topic pages are read, no structures are changed. from __future__ import with_statement # This isn't required in Python 2.6 """ pmbot [OPTIONS] Goes through all topic pages looking for the usage of non-uploaded structure pages (official PDBs) in scenes. Each of these PDB pages is edited such that it contains in the section named "About this Structure" the string
 * 1) -*- coding: utf-8 -*-

"The page TOPICPAGE refers to 1ABC." or "The pages TOPICPAGES refer to 1ABC."

where TOPICPAGE is a topic page link, TOPICPAGES is a comma-separated list of topic page links, and 1ABC is the name of the respective structure page. If such a string exists, it is actualized.

Options: At the moment, there are no options.

""" __version__ = '0.10'
 * 1) (C) R Stephan 2009
 * 2) Distributed under the terms of the GPL2.
 * 1) Distributed under the terms of the GPL2.
 * 1) Distributed under the terms of the GPL2.

import wikipedia,re,sys,config import catlib,traceback,itertools

wikipedia.get_throttle.setDelay(5)
 * 1) wikipedia.put_throttle.setDelay(10)

msg={ 'en': 'pmbot: maintenance of structure references', }

def main: Rco = re.compile (u'') Rt1 = re.compile (u'(?<=\[\[)[^\]]+(?=\]\])') Rt2 = re.compile (u' *\|.*') Rst = re.compile (u'(?<=STRUCTURE_)[1-9][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]') Rap = re.compile (u'(?<=", re.DOTALL)  site = wikipedia.getSite  dic = {}

pagename = 'Proteopedia:Topic_Pages' alltopics = wikipedia.Page (site, pagename) try: temp_text = alltopics.get (False, True) except wikipedia.NoPage: print 'NoPage exception when trying to read topic page list' return
 * 1) Try to read a topic pages list.
 * 1)  with codecs.open('Topic_Pages.txt', encoding='utf-8') as f: temp_text = f.read

m = Rta.search(temp_text) if m == None: print 'Topic list markers not found.' return alltopics_text = Rco.sub (u'', m.group(0)) topicsIter = Rt1.finditer (alltopics_text)

c = 0 for topicmatch in topicsIter: c = c+1 t = topicmatch.group(0) topicname = Rt2.sub (u'', t)   # TODO: check if already loaded before loaded = False; while not loaded: sys.stdout.flush print 'Retrieving ' + topicname.encode ('ascii', 'xmlcharrefreplace') sys.stdout.flush topic = wikipedia.Page (site, topicname) try: loaded = True topic_text = topic.get except wikipedia.NoPage: print 'NoPage exception when trying to read ' + topicname.encode ('ascii', 'xmlcharrefreplace') loaded = False break except wikipedia.SectionError: print 'Subject does not exist: ' + topicname.encode ('ascii', 'xmlcharrefreplace') topicname = re.sub (ur"#.*", '', topicname) loaded = False continue except wikipedia.IsRedirectPage, inst: topicname = inst.args[0] print 'Redirected to ' + topicname.encode ('ascii', 'xmlcharrefreplace') loaded = False continue if not loaded: continue links = itertools.chain (       Rt1.finditer (topic_text),        Rst.finditer (topic_text),        Rap.finditer (topic_text)) for linkmatch in links: l = linkmatch.group(0) linkname = Rt2.sub ('', l)     if linkname[0]>'0' and linkname[0]<='9': if linkname in dic: s = dic[string.lower(linkname)] else: s = set s.add (topicname.encode('ascii', 'xmlcharrefreplace')) dic[string.lower(linkname)] = s print 'Number of topics read: ', c  print 'Number of structures to read/write: ', len(dic) sys.stdout.flush print dic
 * 1)    if c>2: break
 * 1)    print topic_text.encode('utf-8')

if __name__ == '__main__': for arg in wikipedia.handleArgs: try: main except: print 'Something wrong.' traceback.print_exc finally: print 'Stop.' wikipedia.stopme
 * 1) - TODO: flag to switch from applet to scene backlinks to link backlinks
 * 2) - TODO: add option to search scene files
 * 3) - TODO: option to restrict number of topics read (c)
 * 4)    if arg.startswith("-p:"):
 * 5)      if (len(arg)) == len("-p:"):
 * 6)        pred = u"refers to"
 * 7)      else:
 * 8)        pred = arg[len("-p:"):]